#梨视频多页数据爬取思路
import requests
from bs4 import BeautifulSoup
import os
import time

if not os.path.exists(r'梨视频数据'):
    os.mkdir(r'梨视频数据')


def get_video(n):
    res = requests.get('https://www.pearvideo.com/category_loading.jsp?reqType=5&categoryId=31&start=%s' % n)
    soup = BeautifulSoup(res.text, 'lxml')
    li_list = soup.select('li.categoryem')
    # 4.循环获取每个li里面的a标签
    for li in li_list:
        a_tag = li.find(name='a')
        a_href_link = a_tag.get('href')  # video_1742158
        video_id = a_href_link.split('_')[-1]
        # 防盗链
        headers = {
            "Referer": "https://www.pearvideo.com/video_%s" % video_id
        }
        res1 = requests.get('https://www.pearvideo.com/videoStatus.jsp',
                            params={'contId': video_id},
                            headers=headers
                            )
        data_dict = res1.json()
        src_url = data_dict['videoInfo']['videos']['srcUrl']
        systemTime = data_dict['systemTime']
        real_url = src_url.replace(systemTime, 'cont-%s' % video_id)

        res2 = requests.get(real_url)
        file_path = os.path.join(r'梨视频数据', '%s.mp4' % video_id)
        with open(file_path, 'wb') as f:
            f.write(res2.content)
        time.sleep(0.5)


for n in range(12, 48, 12):
    get_video(n)